import pyreadstat
import os
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import mean_absolute_error, mean_squared_error, explained_variance_score
import matplotlib.pyplot as plt
import seaborn as sns
import datetime
# Raw data from https://data.cityofchicago.org/ and only 2018 data has been used when spliting data by SAS
os.chdir("C:/Chao Doc/My Files/Coursera/")
df, meta = pyreadstat.read_sas7bdat('taxitrip18.sas7bdat')
df.head()
df['Weekday'] = df['Trip_Start_Timestamp'].apply(lambda x : x.isoweekday())
df['Month'] = df['Trip_Start_Timestamp'].apply(lambda x : x.month)
df['Hour_Bracket'] = df['Trip_Start_Timestamp'].apply(lambda x : x.hour)
df['Duration'] = (df['Trip_End_Timestamp'] - df['Trip_Start_Timestamp'])
df['Duration'] = df['Duration'].apply(lambda x : x.seconds)
MapRegion = {"Central": [8, 32, 33],\
"North Side": [5, 6, 7, 21, 22],\
"Far North Side": [1,2,3,4,9,10,11,12,13,14,76,77],\
"Northwest Side": [i for i in range(15,21)],\
"West Side": [i for i in range(23,32)],\
"South Side": [34,35,36,37,38,39,40,41,42,43,60,69],\
"Southwest Side": [i for i in list(range(56,60)) + list(range(61,70))],\
"Far Southeast Side": [i for i in range(44,56)],\
"Far Southwest Side": [i for i in range(70,76)],\
"Unknown": [0]
}
df['Pickup_Community_Area'] = np.nan_to_num(df['Pickup_Community_Area']).astype(int)
df['Dropoff_Community_Area'] = np.nan_to_num(df['Dropoff_Community_Area']).astype(int)
df['Pickup_Region'] = df['Pickup_Community_Area'].apply(lambda x: [k for k, v in MapRegion.items() if x in v][0])
df['Dropoff_Region'] = df['Dropoff_Community_Area'].apply(lambda x: [k for k, v in MapRegion.items() if x in v][0])
df['Abs_Diff_Longitude'] = (df.Dropoff_Centroid_Longitude - df.Pickup_Centroid_Longitude).abs()
df['Abs_Diff_Latitude'] = (df.Dropoff_Centroid_Latitude - df.Pickup_Centroid_Latitude).abs()
(df['Trip_End_Timestamp'] - df['Trip_Start_Timestamp']).apply(lambda x : x.seconds)
df.describe()
# df.Trip_Seconds.fillna(df.Duration, inplace=True)
df2 = df.copy()
df2.drop(['Trip_Start_Timestamp', 'Trip_End_Timestamp','Pickup_Community_Area', 'Dropoff_Community_Area'], axis=1, inplace=True)
print(df2.isnull().sum())
print('Old size: %d' % len(df2))
df3 = df2.dropna()
print(df3.isnull().sum())
print('New size: %d' % len(df3))
del df2
del df
df3 = df3[df3.Trip_Total<50]
df3 = df3[df3.Trip_Seconds<1000]
# df3.Trip_Total.hist(bins=100, figsize=(16,9))
fig_dims = (16, 9)
fig, ax = plt.subplots(figsize=fig_dims)
sns.distplot(a=df3['Fare'], label='Fare', ax=ax, kde=False)
sns.distplot(a=df3['Tips'], label='Tips', ax=ax, kde=False)
sns.distplot(a=df3['Tolls'], label='Tolls', ax=ax, kde=False)
sns.distplot(a=df3['Extras'], label='Extras', ax=ax, kde=False)
sns.distplot(a=df3['Trip_Total'], label='Trip_Total', ax=ax, kde=False)
# Force legend to appear
plt.legend()
plt.xlabel('Trip Costs $USD')
plt.title('Histogram of Taxi Costs');
plt.figure(figsize=(16,9))
ax0 = sns.boxplot(data=df3,x='Month',y='Fare')
ax0.set(ylim=(0, 20))
plt.figure(figsize=(16,9))
ax0=sns.boxplot(data=df3,x='Month',y='Trip_Seconds')
ax0.set(ylim=(0, 1000))
plt.figure(figsize=(16,9))
ax1 = sns.boxplot(data=df3,x='Weekday',y='Fare')
ax1.set(ylim=(0, 20))
plt.figure(figsize=(16,9))
ax2=sns.boxplot(data=df3,x='Weekday',y='Trip_Seconds')
ax2.set(ylim=(0, 1000))
plt.figure(figsize=(16,9))
ax3=sns.boxplot(data=df3,x='Hour_Bracket',y='Fare')
ax3.set(ylim=(0, 20))
plt.figure(figsize=(16,9))
ax3=sns.boxplot(data=df3,x='Hour_Bracket',y='Trip_Seconds')
# ax3.set(ylim=(0, 1000))
plt.figure(figsize=(32,60))
ax4=sns.boxplot(data=df3,x='Fare',y='Company')
ax4.set(xlim=(0, 40))
plt.figure(figsize=(32,60))
ax4=sns.boxplot(data=df3,x='Trip_Seconds',y='Company')
ax4.set(xlim=(0, 1000))
plt.figure(figsize=(16,9))
ax5=sns.boxplot(data=df3,x='Pickup_Region',y='Fare')
ax5.set(ylim=(0, 30))
plt.figure(figsize=(16,9))
ax5=sns.boxplot(data=df3,x='Pickup_Region',y='Trip_Seconds')
# ax5.set(ylim=(0, 50))
plt.figure(figsize=(16,9))
ax6=sns.boxplot(data=df3,x='Dropoff_Region',y='Fare')
ax6.set(ylim=(0, 30))
plt.figure(figsize=(16,9))
ax6=sns.boxplot(data=df3,x='Dropoff_Region',y='Trip_Seconds')
# ax6.set(ylim=(0, 50))
plt.figure(figsize=(16,9))
ax7=sns.boxplot(data=df3,x='Payment_Type',y='Fare')
ax7.set(ylim=(0, 30))
plt.figure(figsize=(16,9))
ax7=sns.boxplot(data=df3,x='Payment_Type',y='Trip_Seconds')
# ax7.set(ylim=(0, 50))
plt.figure(figsize=(16,9))
sns.regplot(x=df3['Trip_Seconds'], y=df3['Fare'])
plt.figure(figsize=(16,9))
sns.regplot(x=df3['Duration'], y=df3['Fare'])
df3[['Trip_Miles', 'Trip_Seconds', 'Fare', 'Trip_Total']].corr()
WeekHour_crosstab = pd.crosstab(df3['Hour_Bracket'], df3['Weekday'], margins = False)
print(WeekHour_crosstab)
# Set the width and height of the figure
plt.figure(figsize=(32,20))
# Add title
plt.title("Average Trip Cost for Each Travel, by Weekday and Hour")
# Heatmap showing average arrival delay for each airline by month
sns.heatmap(data=WeekHour_crosstab, annot=True)
# Add label for horizontal axis
plt.ylabel("Weekday")
plt.xlabel("Hour of a Day")
MonthHour_crosstab = pd.crosstab(df3['Hour_Bracket'], df3['Month'], margins = False)
print(MonthHour_crosstab)
# Set the width and height of the figure
plt.figure(figsize=(32,20))
# Add title
plt.title("Average Trip Cost for Each Travel, by Month and Hour")
# Heatmap showing average arrival delay for each airline by month
sns.heatmap(data=MonthHour_crosstab, annot=True)
# Add label for horizontal axis
plt.ylabel("Month")
plt.xlabel("Hour of a Day")
PickupAreaHour_crosstab = pd.crosstab(df3['Hour_Bracket'], df3['Pickup_Region'], margins = False)
print(PickupAreaHour_crosstab)
# Set the width and height of the figure
plt.figure(figsize=(32,20))
# Add title
plt.title("Average Trip Cost for Each Travel, by Pickup Area and Hour")
# Heatmap showing average arrival delay for each airline by month
sns.heatmap(data=PickupAreaHour_crosstab, annot=True)
# Add label for horizontal axis
plt.xlabel("Pickup Area")
plt.ylabel("Hour of a Day")
DropoffAreaHour_crosstab = pd.crosstab(df3['Hour_Bracket'], df3['Dropoff_Region'], margins = False)
print(DropoffAreaHour_crosstab)
# Set the width and height of the figure
plt.figure(figsize=(32,20))
# Add title
plt.title("Average Trip Cost for Each Travel, by Dropoff Area and Hour")
# Heatmap showing average arrival delay for each airline by month
sns.heatmap(data=DropoffAreaHour_crosstab, annot=True)
# Add label for horizontal axis
plt.xlabel("Dropoff Area")
plt.ylabel("Hour of a Day")
features = ['Weekday','Hour_Bracket','Month','Pickup_Region','Dropoff_Region','Trip_Miles','Trip_Seconds','Abs_Diff_Longitude','Abs_Diff_Latitude']
# predicts = ['Trip_Total', 'Trip_Seconds']
X = df3[features]
y = df3['Trip_Total']
# y1 = df3['Trip_Total'].values
# y2 = df3['Trip_Seconds'].values
# Split Random Sample to train 2/3 & valid 1/3
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.4, random_state=8,\
stratify=df3[['Weekday','Hour_Bracket','Month']])
# Object Variables to Transfer
object_cols = [col for col in features if df3[col].dtype == "object"]
object_cols
# Make copy to avoid changing original data
label_X_train = X_train.copy()
label_X_valid = X_valid.copy()
# Apply label encoder to each column with categorical data
label_encoder = LabelEncoder()
for col in object_cols:
label_X_train[col] = label_encoder.fit_transform(X_train[col])
label_X_valid[col] = label_encoder.transform(X_valid[col])
# Unlabeled Variables Onlyn
forest_model = RandomForestRegressor(random_state=8)
forest_model.fit(X_train[X_train.columns.difference(object_cols)], y_train)
pred = forest_model.predict(X_valid[X_valid.columns.difference(object_cols)])
print("MAE",mean_absolute_error(y_valid, pred))
print("RMSE",np.sqrt(mean_squared_error(y_valid, pred)))
# With Characteristic Variables to Numerical Variables
forest_model = RandomForestRegressor(random_state=8)
forest_model.fit(label_X_train, y_train)
pred_labeled1 = forest_model.predict(label_X_valid)
print("MAE",mean_absolute_error(y_valid, pred_labeled1))
print("RMSE",np.sqrt(mean_squared_error(y_valid, pred_labeled1)))
taxi_encoder = OneHotEncoder(handle_unknown='ignore', sparse=False)
label2_X_train = pd.DataFrame(taxi_encoder.fit_transform(X_train[object_cols]))
label2_X_valid = pd.DataFrame(taxi_encoder.transform(X_valid[object_cols]))
# Remove categorical columns (will replace with one-hot encoding)
num_X_train = X_train.drop(object_cols, axis=1)
num_X_valid = X_valid.drop(object_cols, axis=1)
num_X_train.index = label2_X_train.index
num_X_valid.index = label2_X_valid.index
label2_X_train2 = pd.concat([num_X_train, label2_X_train], axis=1, ignore_index=True)
label2_X_valid2 = pd.concat([num_X_valid, label2_X_valid], axis=1, ignore_index=True)
# With Characteristic Variables to Numerical Variables one-hot encoding
forest_model = RandomForestRegressor(random_state=8)
forest_model.fit(label2_X_train2, y_train)
pred_labeled2 = forest_model.predict(label2_X_valid2)
print("MAE",mean_absolute_error(y_valid, pred_labeled2))
print("RMSE",np.sqrt(mean_squared_error(y_valid, pred_labeled2)))
# Without Trip Duration
# With Characteristic Variables to Numerical Variables one-hot encoding
forest_model = RandomForestRegressor(random_state=8)
forest_model.fit(label2_X_train2[label2_X_train2.columns.difference([4])], y_train)
pred_labeled3 = forest_model.predict(label2_X_valid2[label2_X_valid2.columns.difference([4])])
print("MAE",mean_absolute_error(y_valid, pred_labeled3))
print("RMSE",np.sqrt(mean_squared_error(y_valid, pred_labeled3)))
label2_X_train2.columns
plt.figure(figsize=(32,20))
fig, ax = plt.subplots()
ax.scatter(y_valid, pred)
ax.plot([y_valid.min(), y_valid.max()], [y_valid.min(), y_valid.max()], 'k--', lw=4)
ax.set_xlabel('Measured Price')
ax.set_ylabel('Predicted Price (no characteristic variables)')
plt.show()
plt.figure(figsize=(32,20))
fig, ax = plt.subplots()
ax.scatter(y_valid, pred_labeled1)
ax.plot([y_valid.min(), y_valid.max()], [y_valid.min(), y_valid.max()], 'k--', lw=4)
ax.set_xlabel('Measured Price')
ax.set_ylabel('Predicted Price (Characteristic Variables to Numerical Variables)')
plt.show()
plt.figure(figsize=(32,20))
fig, ax = plt.subplots()
ax.scatter(y_valid, pred_labeled2)
ax.plot([y_valid.min(), y_valid.max()], [y_valid.min(), y_valid.max()], 'k--', lw=4)
ax.set_xlabel('Measured Price')
ax.set_ylabel('Predicted Price (one-hot encoding)')
plt.show()
plt.figure(figsize=(32,20))
fig, ax = plt.subplots()
ax.scatter(y_valid, pred_labeled3)
ax.plot([y_valid.min(), y_valid.max()], [y_valid.min(), y_valid.max()], 'k--', lw=4)
ax.set_xlabel('Measured Price')
ax.set_ylabel('Predicted Price (one-hot encoding without Trip Duration)')
plt.show()
# Actual Price VS Predicted Prices
fig_dims = (16, 9)
fig, ax = plt.subplots(figsize=fig_dims)
sns.distplot(a=y_valid, label='Actual Value', ax=ax)
sns.distplot(a=pred, label='Predicted Price of Model 1', ax=ax)
sns.distplot(a=pred_labeled1, label='Predicted Price of Model 2', ax=ax)
sns.distplot(a=pred_labeled2, label='Predicted Price of Model 3', ax=ax)
sns.distplot(a=pred_labeled3, label='Predicted Price of Model 4', ax=ax)
# Force legend to appear
plt.legend()
plt.xlabel('Price Distribution')
plt.title('Histogram of Actual & Predicted Price');
# define some handy analysis support function
def plot_prediction_analysis(y, y_pred, figsize=(10,4), title=''):
fig, axs = plt.subplots(1, 2, figsize=figsize)
axs[0].scatter(y, y_pred)
mn = min(np.min(y), np.min(y_pred))
mx = max(np.max(y), np.max(y_pred))
axs[0].plot([mn, mx], [mn, mx], c='red')
axs[0].set_xlabel('$y$')
axs[0].set_ylabel('$\hat{y}$')
rmse = np.sqrt(mean_squared_error(y, y_pred))
evs = explained_variance_score(y, y_pred)
axs[0].set_title('rmse = {:.2f}, evs = {:.2f}'.format(rmse, evs))
axs[1].hist(y-y_pred, bins=50)
avg = np.mean(y-y_pred)
std = np.std(y-y_pred)
axs[1].set_xlabel('$y - \hat{y}$')
axs[1].set_title('Histrogram prediction error, $\mu$ = {:.2f}, $\sigma$ = {:.2f}'.format(avg, std))
if title!='':
fig.suptitle(title)
model_lin = Pipeline((
("standard_scaler", StandardScaler()),
("lin_reg", LinearRegression()),
))
model_lin.fit(label2_X_train2, y_train)
y_train_pred = model_lin.predict(label2_X_train2)
plot_prediction_analysis(y_train, y_train_pred, title='Linear Model - Trainingset')
y_valid_pred = model_lin.predict(label2_X_valid2)
plot_prediction_analysis(y_valid, y_valid_pred, title='Linear Model - Validset')
# Without Time Variable
model_lin = Pipeline((
("standard_scaler", StandardScaler()),
("lin_reg", LinearRegression()),
))
model_lin.fit(label2_X_train2[label2_X_train2.columns.difference([4])], y_train)
y_train_pred = model_lin.predict(label2_X_train2[label2_X_train2.columns.difference([4])])
plot_prediction_analysis(y_train, y_train_pred, title='Linear Model - Trainingset')
y_valid_pred = model_lin.predict(label2_X_valid2[label2_X_valid2.columns.difference([4])])
plot_prediction_analysis(y_valid, y_valid_pred, title='Linear Model - Validset')
test, meta2 = pyreadstat.read_sas7bdat('taxitrip19.sas7bdat')
test['Weekday'] = test['Trip_Start_Timestamp'].apply(lambda x : x.isoweekday())
test['Month'] = test['Trip_Start_Timestamp'].apply(lambda x : x.month)
test['Hour_Bracket'] = test['Trip_Start_Timestamp'].apply(lambda x : x.hour)
test['Duration'] = (test['Trip_End_Timestamp'] - test['Trip_Start_Timestamp'])
test['Duration'] = test['Duration'].apply(lambda x : x.seconds)
MapRegion = {"Central": [8, 32, 33],\
"North Side": [5, 6, 7, 21, 22],\
"Far North Side": [1,2,3,4,9,10,11,12,13,14,76,77],\
"Northwest Side": [i for i in range(15,21)],\
"West Side": [i for i in range(23,32)],\
"South Side": [34,35,36,37,38,39,40,41,42,43,60,69],\
"Southwest Side": [i for i in list(range(56,60)) + list(range(61,70))],\
"Far Southeast Side": [i for i in range(44,56)],\
"Far Southwest Side": [i for i in range(70,76)],\
"Unknown": [0]
}
test['Pickup_Community_Area'] = np.nan_to_num(test['Pickup_Community_Area']).astype(int)
test['Dropoff_Community_Area'] = np.nan_to_num(test['Dropoff_Community_Area']).astype(int)
test['Pickup_Region'] = test['Pickup_Community_Area'].apply(lambda x: [k for k, v in MapRegion.items() if x in v][0])
test['Dropoff_Region'] = test['Dropoff_Community_Area'].apply(lambda x: [k for k, v in MapRegion.items() if x in v][0])
test['Abs_Diff_Longitude'] = (test.Dropoff_Centroid_Longitude - test.Pickup_Centroid_Longitude).abs()
test['Abs_Diff_Latitude'] = (test.Dropoff_Centroid_Latitude - test.Pickup_Centroid_Latitude).abs()
(test['Trip_End_Timestamp'] - test['Trip_Start_Timestamp']).apply(lambda x : x.seconds)
test = test.dropna()
test = test[test.Trip_Total<50]
test = test[test.Trip_Seconds<1000]
X_test = test[features]
y_test = test['Trip_Total']
taxi_encoder = OneHotEncoder(handle_unknown='ignore', sparse=False)
label2_X_train = pd.DataFrame(taxi_encoder.fit_transform(X_train[object_cols]))
label2_X_test = pd.DataFrame(taxi_encoder.transform(X_test[object_cols]))
# Remove categorical columns (will replace with one-hot encoding)
num_X_train = X_train.drop(object_cols, axis=1)
num_X_test = X_test.drop(object_cols, axis=1)
num_X_train.index = label2_X_train.index
num_X_test.index = label2_X_test.index
label2_X_train2 = pd.concat([num_X_train, label2_X_train], axis=1, ignore_index=True)
label2_X_test2 = pd.concat([num_X_test, label2_X_test], axis=1, ignore_index=True)
# Without Trip Duration
# With Characteristic Variables to Numerical Variables one-hot encoding on the Test Set of 2019 data
forest_model = RandomForestRegressor(random_state=8)
forest_model.fit(label2_X_train2[label2_X_train2.columns.difference([4])], y_train)
pred_labeled4 = forest_model.predict(label2_X_test2[label2_X_test2.columns.difference([4])])
print("MAE",mean_absolute_error(y_test, pred_labeled4))
print("RMSE",np.sqrt(mean_squared_error(y_test, pred_labeled4)))
# Without Time Variable
model_lin = Pipeline((
("standard_scaler", StandardScaler()),
("lin_reg", LinearRegression()),
))
model_lin.fit(label2_X_train2[label2_X_train2.columns.difference([4])], y_train)
y_train_pred2 = model_lin.predict(label2_X_train2[label2_X_train2.columns.difference([4])])
plot_prediction_analysis(y_train, y_train_pred, title='Linear Model - Trainingset')
y_test_pred2 = model_lin.predict(label2_X_test2[label2_X_test2.columns.difference([4])])
plot_prediction_analysis(y_test, y_test_pred2, title='Linear Model - Testset')
import folium
from folium import Choropleth, Circle, Marker
from folium.plugins import HeatMap, MarkerCluster
# Create a map of Chicago
m_1 = folium.Map(location=[41.881832, -87.623177], tiles='openstreetmap', zoom_start=10)
# Display the map
m_1
# Select one day
tomap = y_valid - pred_labeled3
tomap.head()
# Actual Price VS Predicted Prices
fig_dims = (16, 9)
fig, ax = plt.subplots(figsize=fig_dims)
sns.distplot(a=tomap, label='Error Value', ax=ax)
ax.set(xlim=(-10, 10))
# Force legend to appear
plt.legend()
plt.xlabel('Price Error Distribution')
plt.title('Histogram of Predicted Error');
acurate = tomap.index[abs(tomap.values) <= 2.5].tolist()
inacurate = tomap.index[abs(tomap.values) >= 2.5].tolist()
len(acurate)
# Monday Morning Accurate Origin
acuratepred = df3.ix[acurate]
m_2 = folium.Map(location=[41.881832, -87.623177], tiles='openstreetmap', zoom_start=10)
# Create the map
HeatMap(data=acuratepred[(acuratepred.Weekday == 1) & (acuratepred.Hour_Bracket < 10) & (acuratepred.Hour_Bracket >= 6)][['Pickup_Centroid_Latitude', 'Pickup_Centroid_Longitude']], radius=10).add_to(m_2)
# Display the map
m_2
# Monday Morning Inaccurate Origin
inacuratepred = df3.ix[inacurate]
m_3 = folium.Map(location=[41.881832, -87.623177], tiles='openstreetmap', zoom_start=10)
# Create the map
HeatMap(data=inacuratepred[(inacuratepred.Weekday == 1) & (inacuratepred.Hour_Bracket < 10) & (inacuratepred.Hour_Bracket >= 6)][['Pickup_Centroid_Latitude', 'Pickup_Centroid_Longitude']], radius=10).add_to(m_3)
# Display the map
m_3
# Monday Morning Accurate Target
m_4 = folium.Map(location=[41.881832, -87.623177], tiles='openstreetmap', zoom_start=10)
# Create the map
HeatMap(data=acuratepred[(acuratepred.Weekday == 1) & (acuratepred.Hour_Bracket < 10) & (acuratepred.Hour_Bracket >= 6)][['Dropoff_Centroid_Latitude', 'Dropoff_Centroid_Longitude']], radius=10).add_to(m_4)
# Display the map
m_4
# Monday Morning Inaccurate Target
m_5 = folium.Map(location=[41.881832, -87.623177], tiles='openstreetmap', zoom_start=10)
# Create the map
HeatMap(data=inacuratepred[(inacuratepred.Weekday == 1) & (inacuratepred.Hour_Bracket < 10) & (inacuratepred.Hour_Bracket >= 6)][['Dropoff_Centroid_Latitude', 'Dropoff_Centroid_Longitude']], radius=10).add_to(m_5)
# Display the map
m_5
# Saturday Night Accurate Origin
acuratepred = df3.ix[acurate]
m_6 = folium.Map(location=[41.881832, -87.623177], tiles='openstreetmap', zoom_start=10)
# Create the map
HeatMap(data=acuratepred[(acuratepred.Weekday == 6) & (acuratepred.Hour_Bracket < 23) & (acuratepred.Hour_Bracket >= 18)][['Pickup_Centroid_Latitude', 'Pickup_Centroid_Longitude']], radius=10).add_to(m_6)
# Display the map
m_6
# Saturday Night Inaccurate Origin
inacuratepred = df3.ix[inacurate]
m_7 = folium.Map(location=[41.881832, -87.623177], tiles='openstreetmap', zoom_start=10)
# Create the map
HeatMap(data=inacuratepred[(inacuratepred.Weekday == 6) & (inacuratepred.Hour_Bracket < 23) & (inacuratepred.Hour_Bracket >= 18)][['Pickup_Centroid_Latitude', 'Pickup_Centroid_Longitude']], radius=10).add_to(m_7)
# Display the map
m_7
# Saturday Night Accurate Target
acuratepred = df3.ix[acurate]
m_8 = folium.Map(location=[41.881832, -87.623177], tiles='openstreetmap', zoom_start=10)
# Create the map
HeatMap(data=acuratepred[(acuratepred.Weekday == 6) & (acuratepred.Hour_Bracket < 23) & (acuratepred.Hour_Bracket >= 18)][['Dropoff_Centroid_Latitude', 'Dropoff_Centroid_Longitude']], radius=10).add_to(m_8)
# Display the map
m_8
# Saturday Night Inaccurate Target
inacuratepred = df3.ix[inacurate]
m_9 = folium.Map(location=[41.881832, -87.623177], tiles='openstreetmap', zoom_start=10)
# Create the map
HeatMap(data=inacuratepred[(inacuratepred.Weekday == 6) & (inacuratepred.Hour_Bracket < 23) & (inacuratepred.Hour_Bracket >= 18)][['Dropoff_Centroid_Latitude', 'Dropoff_Centroid_Longitude']], radius=10).add_to(m_9)
# Display the map
m_9